Introduction
This data comes from the Antimicrobial resistance package titled AMR, which is frequently used for epidemiological investigations. I am particularly interested in the field of antibiotic resistance and this r package contains a wide variety of suitable data that can be used for prediction analyses and data visualization.
glimpse(microorganisms)## Rows: 70,026
## Columns: 16
## $ mo <mo> "F_FUNGUS", "B_GRAMN", "B_GRAMP", "UNKNOWN", "F_YEAST", "B_[…
## $ fullname <chr> "(unknown fungus)", "(unknown Gram-negatives)", "(unknown G…
## $ kingdom <chr> "Fungi", "Bacteria", "Bacteria", "(unknown kingdom)", "Fung…
## $ phylum <chr> "(unknown phylum)", "(unknown phylum)", "(unknown phylum)",…
## $ class <chr> "(unknown class)", "(unknown class)", "(unknown class)", "(…
## $ order <chr> "(unknown order)", "(unknown order)", "(unknown order)", "(…
## $ family <chr> "(unknown family)", "(unknown family)", "(unknown family)",…
## $ genus <chr> "(unknown genus)", "(unknown Gram-negatives)", "(unknown Gr…
## $ species <chr> "(unknown species)", "(unknown species)", "(unknown species…
## $ subspecies <chr> "(unknown subspecies)", "(unknown subspecies)", "(unknown s…
## $ rank <chr> "species", "species", "species", "(unknown rank)", "species…
## $ ref <chr> NA, NA, NA, NA, NA, NA, NA, NA, "Tahon et al., 2018", "", "…
## $ species_id <chr> "", "", "", "", "", "", "", "", "797965", "9164ea7340beaa54…
## $ source <chr> "manually added", "manually added", "manually added", "manu…
## $ prevalence <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2,…
## $ snomed <list> <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, <>, "1…
glimpse(antibiotics)## Rows: 456
## Columns: 14
## $ ab <ab> "AMA", "FCT", "ACM", "ASP", "ALS", "AMK", "AKF", "AMX", "…
## $ atc <chr> "J04AA01", "D01AE21", NA, NA, "J04BA03", "J01GB06", NA, …
## $ cid <dbl> 4649, 3366, 6450012, 49787020, 8954, 37768, NA, 33613, 2…
## $ name <chr> "4-aminosalicylic acid", "5-fluorocytosine", "Acetylmide…
## $ group <chr> "Antimycobacterials", "Antifungals/antimycotics", "Macro…
## $ atc_group1 <chr> "Drugs for treatment of tuberculosis", "Antifungals for …
## $ atc_group2 <chr> "Aminosalicylic acid and derivatives", "Other antifungal…
## $ abbreviations <list> "", <"5flc", "fluo">, "", "", "", <"ak", "ami", "amik",…
## $ synonyms <list> <"aminopar", "aminosalicylic", "aminosalicylic acid", "…
## $ oral_ddd <dbl> 12.00, NA, NA, NA, 0.33, NA, NA, 1.50, 1.50, NA, NA, NA,…
## $ oral_units <chr> "g", NA, NA, NA, "g", NA, NA, "g", "g", NA, NA, NA, "g",…
## $ iv_ddd <dbl> NA, NA, NA, NA, NA, 1.0, NA, 3.0, 3.0, NA, 35.0, NA, 6.0…
## $ iv_units <chr> NA, NA, NA, NA, NA, "g", NA, "g", "g", NA, "mg", NA, "g"…
## $ loinc <list> <>, <"10974-4", "23805-5", "25142-1", "25143-9", "3639-…
glimpse(example_isolates)## Rows: 2,000
## Columns: 49
## $ date <date> 2002-01-02, 2002-01-03, 2002-01-07, 2002-01-07, 2002-…
## $ hospital_id <fct> D, D, B, B, B, B, D, D, B, B, D, D, D, D, D, B, B, B, …
## $ ward_icu <lgl> FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TR…
## $ ward_clinical <lgl> TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, FA…
## $ ward_outpatient <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE…
## $ age <dbl> 65, 65, 45, 45, 45, 45, 78, 78, 45, 79, 67, 67, 71, 71…
## $ gender <chr> "F", "F", "F", "F", "F", "F", "M", "M", "F", "F", "M",…
## $ patient_id <chr> "A77334", "A77334", "067927", "067927", "067927", "067…
## $ mo <mo> "B_ESCHR_COLI", "B_ESCHR_COLI", "B_STPHY_EPDR", "B_STPH…
## $ PEN <rsi> R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, …
## $ OXA <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ FLC <rsi> NA, NA, R, R, R, R, S, S, R, S, S, S, NA, NA, NA, NA, …
## $ AMX <rsi> NA, NA, NA, NA, NA, NA, R, R, NA, NA, NA, NA, NA, NA, …
## $ AMC <rsi> I, I, NA, NA, NA, NA, S, S, NA, NA, S, S, I, I, R, I, …
## $ AMP <rsi> NA, NA, NA, NA, NA, NA, R, R, NA, NA, NA, NA, NA, NA, …
## $ TZP <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ CZO <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ FEP <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ CXM <rsi> I, I, R, R, R, R, S, S, R, S, S, S, S, S, NA, S, S, R,…
## $ FOX <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ CTX <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S, …
## $ CAZ <rsi> NA, NA, R, R, R, R, R, R, R, R, R, R, NA, NA, NA, S, S…
## $ CRO <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S, …
## $ GEN <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ TOB <rsi> NA, NA, NA, NA, NA, NA, S, S, NA, NA, NA, NA, S, S, NA…
## $ AMK <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ KAN <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ TMP <rsi> R, R, S, S, R, R, R, R, S, S, NA, NA, S, S, S, S, S, R…
## $ SXT <rsi> R, R, S, S, NA, NA, NA, NA, S, S, NA, NA, S, S, S, S, …
## $ NIT <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ FOS <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ LNZ <rsi> R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R,…
## $ CIP <rsi> NA, NA, NA, NA, NA, NA, NA, NA, S, S, NA, NA, NA, NA, …
## $ MFX <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ VAN <rsi> R, R, S, S, S, S, S, S, S, S, NA, NA, R, R, R, R, R, S…
## $ TEC <rsi> R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R,…
## $ TCY <rsi> R, R, S, S, S, S, S, S, S, I, S, S, NA, NA, I, R, R, S…
## $ TGC <rsi> NA, NA, S, S, S, S, S, S, S, NA, S, S, NA, NA, NA, R, …
## $ DOX <rsi> NA, NA, S, S, S, S, S, S, S, NA, S, S, NA, NA, NA, R, …
## $ ERY <rsi> R, R, R, R, R, R, S, S, R, S, S, S, R, R, R, R, R, R, …
## $ CLI <rsi> R, R, NA, NA, NA, R, NA, NA, NA, NA, NA, NA, R, R, R, …
## $ AZM <rsi> R, R, R, R, R, R, S, S, R, S, S, S, R, R, R, R, R, R, …
## $ IPM <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, S, S, …
## $ MEM <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ MTR <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ CHL <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ COL <rsi> NA, NA, R, R, R, R, R, R, R, R, R, R, NA, NA, NA, R, R…
## $ MUP <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ RIF <rsi> R, R, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, R, R, R,…
glimpse(WHONET)## Rows: 500
## Columns: 53
## $ `Identification number` <chr> "fe41d7bafa", "91f175ec37", "cc4015…
## $ `Specimen number` <int> 1748, 1767, 1343, 1894, 1739, 1846,…
## $ Organism <chr> "SPN", "eco", "eco", "MAP", "PVU", …
## $ Country <chr> "Belgium", "The Netherlands", "The …
## $ Laboratory <chr> "National Laboratory of Belgium", "…
## $ `Last name` <chr> "Abel", "Delacroix", "Steensen", "B…
## $ `First name` <chr> "B.", "F.", "F.", "L.", "W.", "J.",…
## $ Sex <chr> "F", "M", "M", "M", "M", "F", "F", …
## $ Age <dbl> 68, 89, 85, 62, 86, 53, 77, 53, 63,…
## $ `Age category` <chr> "55-74", "75+", "75+", "55-74", "75…
## $ `Date of admission` <date> 2005-01-12, 2006-07-30, 2014-03-05…
## $ `Specimen date` <date> 2005-01-30, 2006-08-16, 2014-03-14…
## $ `Specimen type` <chr> "Urine", "Urine", "Urine", "Urine",…
## $ `Specimen type (Numeric)` <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
## $ Reason <chr> "Unknown", "Unknown", "Unknown", "U…
## $ `Isolate number` <int> 1748, 1767, 1343, 1894, 1739, 1846,…
## $ `Organism type` <chr> "Bacteria", "Bacteria", "Bacteria",…
## $ Serotype <chr> "", "", "", "", "", "", "", "", "",…
## $ `Beta-lactamase` <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ ESBL <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ Carbapenemase <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ `MRSA screening test` <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ `Inducible clindamycin resistance` <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, …
## $ Comment <chr> "", "", "", "", "", "", "", "", "",…
## $ `Date of data entry` <date> 2005-01-30, 2006-08-16, 2014-03-14…
## $ AMP_ND10 <rsi> S, NA, S, R, R, S, NA, NA, R, NA, S…
## $ AMC_ED20 <rsi> S, S, S, NA, R, S, S, S, R, S, S, I…
## $ TZP_ED30 <rsi> S, NA, S, NA, S, S, S, NA, S, NA, S…
## $ FEP_ED30 <rsi> NA, NA, NA, NA, NA, NA, S, NA, S, N…
## $ CTX_ED5 <rsi> NA, NA, NA, NA, S, S, S, NA, R, NA,…
## $ FOX_ED30 <rsi> NA, NA, NA, NA, R, S, S, NA, R, NA,…
## $ CAZ_ED10 <rsi> R, R, NA, R, S, S, S, R, S, R, S, S…
## $ CRO_ED30 <rsi> NA, NA, NA, NA, S, S, S, NA, R, NA,…
## $ CIP_ED5 <rsi> NA, NA, NA, NA, S, S, S, S, S, NA, …
## $ AMK_ED30 <rsi> NA, NA, NA, NA, NA, NA, NA, NA, S, …
## $ GEN_ED10 <rsi> R, S, NA, R, S, S, S, S, S, S, S, S…
## $ TOB_ED10 <rsi> R, NA, NA, R, S, S, S, NA, S, NA, S…
## $ SXT_ED1.2 <rsi> S, R, NA, NA, R, S, S, S, R, S, S, …
## $ IPM_ND10 <rsi> NA, NA, NA, NA, I, S, S, NA, S, NA,…
## $ PEN_ND1 <rsi> S, R, S, R, R, R, R, R, R, R, R, R,…
## $ AMP_ND2 <rsi> S, NA, S, R, R, S, NA, NA, R, NA, S…
## $ AMC_ND2 <rsi> S, S, S, NA, R, S, S, S, R, S, S, I…
## $ CHL_ND30 <rsi> NA, NA, NA, NA, NA, NA, NA, NA, R, …
## $ VAN_ED5 <rsi> S, S, NA, NA, R, R, R, S, R, S, R, …
## $ OXA_ED1 <rsi> NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ERY_ED15 <rsi> S, S, NA, NA, R, R, R, S, R, S, R, …
## $ CLI_ED2 <rsi> NA, S, S, S, NA, NA, NA, NA, NA, NA…
## $ TCY_ED30 <rsi> S, S, NA, NA, R, NA, NA, R, R, S, R…
## $ RIF_ED5 <rsi> NA, NA, NA, NA, R, R, R, NA, R, NA,…
## $ PEN_EE <rsi> S, R, S, R, R, R, R, R, R, R, R, R,…
## $ AMP_EE <rsi> S, NA, S, R, R, S, NA, NA, R, NA, S…
## $ CRO_EE <rsi> NA, NA, NA, NA, S, S, S, NA, R, NA,…
## $ CIP_EE <rsi> NA, NA, NA, NA, S, S, S, S, S, NA, …
Question 1
Which Phylum of Fungi is most prevalent within the microorganisms dataset?
Methods
ggthemr("light") #Set theme
microorganisms%>% #Main Pipe
filter(kingdom == "Fungi")%>% #Subset data to only keep Fungi
select(phylum)%>% #Keep only phylums
count(phylum)%>% #Count Phylums
ggplot()+
aes(y=reorder(phylum, +n), #Map y-axis and reorder phylums descendingly
x=n, #Map counts to x-axis
fill = phylum)+ #Fill based on phylum
geom_col()+
scale_x_log10()+ #Change x to a log scale
labs(y = "Phylums", #X-axis label
x = "Count", #Y-axis label
title = "Fungi Phylums examined within microorganisms dataset")+ #Add title
theme(legend.position = "none", #Remove legend
plot.background = element_rect(fill = "#f7f7f7")) -> Plot1 #Change background color
ggplotly(Plot1, tooltip = c("n", "phylum")) #Change hover label to only n and phylumAnswer
The Fungi Phylum Ascomycota seems to be the most prevalent within this dataset.
Question 2
Is there a linear relationship between oral and iv dosages of antibiotics?
Methods
ggthemr("light") #Set theme
ggplot(antibiotics)+ #Use antibiotics dataset
aes(x = oral_ddd, #Map x-axis to oral dosages
y = iv_ddd)+ #Map y-axis to iv dosages
geom_point()+ #Create scatterplot
labs(y = "IV Dosage", #Y-axis label
x = "Oral Dosage", #X-axis label
title = "Oral vs IV Dosgaes of Antibiotics")+ #Add title
scale_x_log10()+ #Log scale x-axis
scale_y_log10()+ #Log scale y-axis
theme(plot.background = element_rect(fill = "#f7f7f7")) -> Plot2 #Change background color
ggplotly(Plot2)Answer
Yes, we can observe a positive linear relationship between oral and iv dosages of antibiotics.
Question 3
In the future, do we expect Gram-positive Bacteria to become more or less resistant to the antibiotic Doxycycline?
Methods
ggthemr("light") #Set theme
example_isolates %>% #Create main pipe
filter(mo_gramstain(mo, language = NULL) == "Gram-positive")%>% #Subset to Gram positive bacteria
resistance_predict(col_ab = "DOX", #Predict resistance on Doxycycline
col_date = "date",
model = "binomial",
info = FALSE,
minimum = 15)%>%
ggplot() +
aes(x = year, #Map x-axis to year
y = value)+ #Map y-axis to resistance value
geom_col() + #Create barplot
geom_errorbar(aes(ymin = se_min, #lower bar is the min standard error
ymax = se_max), #higher bar is the max standard error
color = "#31a183", #Change color
width=0.6) + #Change the width of bars
scale_y_continuous(limits = c(0, 1), #limits of y-axis
breaks = seq(0, 1, 0.1), #Add a breakage each 0.1
labels = paste0(seq(0, 100, 10), "%")) + #Change y-axis labels to percentages
labs(title = "Forecast of Doxycycline Resistance in Gram-positive Bacteria", #Add title
y = "%R", #Label y-axis
x = "Year")+ #Label x-axis
theme(plot.background = element_rect(fill = "#f7f7f7"))-> Plot3 #Change background color
ggplotly(Plot3)Answer
Based on this plot we can conclude that resistance against Doxycycline will increase in Gram positive bacteria in the future.
Question 4
Geographically, where was the data from the WHONET dataset compiled?
Methods
img <- image_url("blue-marble") #texture for globe
WHONET%>% #Create main pipe
select(Country)%>% #Subset to country
mutate(code = case_when( #Insert new code column
Country == "Belgium" ~ "BEL", #Change Belgium to BEL in code col
Country == "The Netherlands" ~ "NLD", #Change The Netherlands to NLD in code col
Country == "Denmark" ~ "DNK", #Change Denmark to DNK in code col
Country == "France" ~ "FRA", #Change France to FRA in code col
Country == "Germany" ~ "DEU"))%>% #Change Germany to DEU in code col
count(code) -> Updated_countries # count countries in code col
create_globe()%>%
globe_img_url(img) %>% #Change globe texture to img texture
globe_choropleth(
data = Updated_countries, #Use Updated_countries dataset
coords(country = code, #Plot coordinates using code col
cap_color = n, #Color countries based on count
altitude = n))%>% #Elevate countries based on count
globe_background("#f7f7f7")%>% #Change background color
scale_choropleth_cap_color()%>% #Include cap color
scale_choropleth_altitude(0.06, 0.1) #Reduce altitude scaleAnswer
The data in the WHONET dataset seems to come from locations in central and northern Europe.